Return Home


German Credit Data Analysis(Python)¶

Problem¶

1. Determine the optimum age to target for customers¶

2.Determine the type of loan that attracts most clients¶

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [4]:
german_credit_df = pd.read_csv(r"C:\Users\jki\Downloads\german_credit_data.csv")
german_credit_df.head(5)
Out[4]:
Unnamed: 0 Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose
0 0 67 male 2 own NaN little 1169 6 radio/TV
1 1 22 female 2 own little moderate 5951 48 radio/TV
2 2 49 male 1 own little NaN 2096 12 education
3 3 45 male 2 free little little 7882 42 furniture/equipment
4 4 53 male 2 free little little 4870 24 car
In [5]:
print(german_credit_df.columns)
Index(['Unnamed: 0', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts',
       'Checking account', 'Credit amount', 'Duration', 'Purpose'],
      dtype='object')
In [6]:
print("Purpose : ",german_credit_df.Purpose.unique())
print("Sex : ",german_credit_df.Sex.unique())
print("Housing : ",german_credit_df.Housing.unique())
print("Saving accounts : ",german_credit_df['Saving accounts'].unique())
print("Checking account : ",german_credit_df['Checking account'].unique())
Purpose :  ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']
Sex :  ['male' 'female']
Housing :  ['own' 'free' 'rent']
Saving accounts :  [nan 'little' 'quite rich' 'rich' 'moderate']
Checking account :  ['little' 'moderate' nan 'rich']
In [7]:
german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].map({"little":0,"moderate":1,"quite rich":2 ,"rich":3 });
german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].fillna(german_credit_df['Saving accounts'].dropna().mean())

german_credit_df['Checking account'] = german_credit_df['Checking account'].map({"little":0,"moderate":1,"rich":2 });
german_credit_df['Checking account'] = german_credit_df['Checking account'].fillna(german_credit_df['Checking account'].dropna().mean())

german_credit_df['Sex'] = german_credit_df['Sex'].map({"male":0,"female":1}).astype(float);

german_credit_df['Housing'] = german_credit_df['Housing'].map({"own":0,"free":1,"rent":2}).astype(float);

german_credit_df['Purpose'] = german_credit_df['Purpose'].map({'radio/TV':0, 'education':1, 'furniture/equipment':2, 'car':3, 'business':4,
       'domestic appliances':5, 'repairs':6, 'vacation/others':7}).astype(float);

german_credit_df.head(10)
Out[7]:
Unnamed: 0 Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose
0 0 67 0.0 2 0.0 0.456548 0.000000 1169 6 0.0
1 1 22 1.0 2 0.0 0.000000 1.000000 5951 48 0.0
2 2 49 0.0 1 0.0 0.000000 0.651815 2096 12 1.0
3 3 45 0.0 2 1.0 0.000000 0.000000 7882 42 2.0
4 4 53 0.0 2 1.0 0.000000 0.000000 4870 24 3.0
5 5 35 0.0 1 1.0 0.456548 0.651815 9055 36 1.0
6 6 53 0.0 2 0.0 2.000000 0.651815 2835 24 2.0
7 7 35 0.0 3 2.0 0.000000 1.000000 6948 36 3.0
8 8 61 0.0 1 0.0 3.000000 0.651815 3059 12 0.0
9 9 28 0.0 3 0.0 0.000000 1.000000 5234 30 3.0
In [9]:
plt.scatter(german_credit_df['Credit amount'],german_credit_df["Age"])
plt.figure()
Out[9]:
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
In [12]:
sns.pairplot(german_credit_df)
Out[12]:
<seaborn.axisgrid.PairGrid at 0x1b118e61650>
In [14]:
plt.scatter(german_credit_df['Credit amount'],german_credit_df["Duration"])
plt.figure()
Out[14]:
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
In [15]:
plt.scatter(german_credit_df['Saving accounts'],german_credit_df["Duration"])
plt.figure()
Out[15]:
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
In [16]:
fig = german_credit_df["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')
Out[16]:
Text(7, -50, 'vacation')
In [17]:
limitedCredit = german_credit_df[(german_credit_df["Credit amount"]<=5000)==True];
imitedCredit = german_credit_df[(german_credit_df["Credit amount"]>2000)==True];
fig = limitedCredit["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')
Out[17]:
Text(7, -50, 'vacation')
In [18]:
fig =german_credit_df.Age.hist(bins=60)
fig.text(40, -10, 'Age', ha='center')
fig.text(0, 40, 'Frequency', ha='center')
Out[18]:
Text(0, 40, 'Frequency')
In [19]:
fig = german_credit_df["Job"].hist()
fig.text(-0.5, 400, 'Frequency', ha='center')
fig.text(0, -100, 'UnSkilled', ha='center')
fig.text(1, -100, 'UnSkilled Resident', ha='center')
fig.text(2, -100, 'Skilled', ha='center')
fig.text(3, -100, 'Highly Skilled', ha='center')
Out[19]:
Text(3, -100, 'Highly Skilled')

Result:¶

  1. People from Age 23 to 32 are the target customer and the amount can be in range 2000 to 5000 .
  2. Offers for car loan and radio loan can pick up more customers or lenders.
  3. Short term credit with credit range 2000 t0 5000 yield maximum customer and profits.
In [20]:
from sklearn.cluster import KMeans;
from sklearn.decomposition import PCA; 
from sklearn.preprocessing import normalize;
y = KMeans().fit_predict(german_credit_df)
X_norm = normalize(german_credit_df);
y_PCA = PCA(n_components=2).fit_transform(X_norm,2);
y_PCA.shape
C:\Users\jki\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
Out[20]:
(1000, 2)
In [22]:
plt.scatter(german_credit_df['Credit amount'],german_credit_df['Age'],c=y)
plt.figure()
plt.scatter(y_PCA[:,0],y_PCA[:,1],c=y)
Out[22]:
<matplotlib.collections.PathCollection at 0x1b1247c90d0>
In [ ]: